# The data have been downloaded from http://www.stats.ox.ac.uk/~snijders/siena/Glasgow_data.htm
# and unzipped into the working directory.
# Create a tidy data frame for Session 2. ####
# Load and count friendship nominations.
# function to recode friendship variables
friend_dich <- function(x) ifelse(x %in% c(1, 2), 1, 0)
# create data frame with wave indicator
friendships <- rbind(friendship.1, friendship.2, friendship.3) %>%
#change into a data frame
broom::fix_data_frame(newcol = "student") %>%
#add wave indicator
mutate(wave = c( rep("t1", 160), rep("t2", 160), rep("t3", 160))) %>%
#dichotomize all friendship variables
mutate_at(vars(s001:s160), friend_dich) %>%
#count number of friendship nominations received per wave
group_by(wave) %>%
summarise_at(vars(s001:s160), sum) %>%
#stack students
names_to = "student",
values_to = "friendships"
# Load demographic variables: age and sex.
# Load substance use matrices: alcohol, cannabis, tobacco.
# Load various data: .
# Create data framnamee.
Glasgow <- cbind( #create matrix
age = age,
sex = sex.F,
alcohol = alcohol,
cannabis = cannabis,
tobacco = tobacco,
familysmoking = familysmoking,
money = money,
romantic = romantic
) %>%
#change into dataframe with new variable s
newcol = "student",
newnames = list("age", "sex",
"alcohol_t1", "alcohol_t2", "alcohol_t3",
"cannabis_t1", "cannabis_t2", "cannabis_t3",
"tobacco_t1", "tobacco_t2", "tobacco_t3",
"smoking_at_home", "smoking_parents", "smoking_siblings",
"money_t1", "money_t2", "money_t3",
"romantic_t1", "romantic_t2", "romantic_t3"
) %>%
# stack repeated measurements
-c(student, age, sex, smoking_at_home, smoking_parents, smoking_siblings),
names_to = c(".value", "wave"),
names_sep = "_"
) %>%
# recode values
sex = recode(sex, `1` = "boy", `2` = "girl"),
smoking_at_home = recode(smoking_at_home, `1` = "no", `2` = "yes"),
smoking_parents = recode(smoking_parents, `1` = "no", `2` = "yes"),
smoking_siblings = recode(smoking_siblings, `1` = "no", `2` = "yes"),
alcohol = recode(alcohol,
`1` = "1 none", `2` = "2 once or twice a year", `3` = "3 once a month",
`4` = "4 once a week", `5` = "5 more than once a week"
cannabis = recode(cannabis,
`1` = "1 none", `2` = "2 tried once", `3` = "3 occasional",
`4` = "4 regular"
tobacco = recode(tobacco,
`1` = "1 none", `2` = "2 occasional", `3` = "3 regular"
romantic = recode(romantic, `1` = "no", `2` = "yes")
) %>%
#add friendships count
left_join(friendships, by = c("wave", "student")) %>%
#sorted by money (rather random)
arrange(money, friendships)
# save as .RData for inclusion in the package
save(Glasgow, file = "Glasgow.RData")
rm(alcohol, cannabis, familysmoking, friendship.1, friendship.2, friendship.3, friendships, money, romantic, tobacco, age, sex.F, friend_dich, Glasgow)
# Create an untidy data frame for Session 3. ####
# Start with table of Session.
# Load table.
# Create virtual neigbourhoods and add to student with distance to school category
# distance.1 is student by student matrix with distances at wave 1
# replace NA by overall mean
dist1 <- ifelse(is.na(distance.1),
(ifelse(is.na(distance.3), mean(distance.1, na.rm = TRUE), distance.3)),
distance.2)), distance.1)
clustering <- hclust(dist(dist1), method = "ward.D")
neighbourhood <- cutree(clustering, k=4) # cut tree into 4 clusters (one contains missings)
# draw dendogram with red borders around the 5 clusters
rect.hclust(clustering, k=4, border="red")
# add distance from school per group at wave 1 and group label to tidy frame
GlasgowFriends <- as_tibble(cbind(neighbourhood, dist.school), rownames = "student" ) %>%
select(student, neighbourhood, school.dist = l1) %>%
group_by(neighbourhood) %>%
mutate(schooldist = round(mean(school.dist, na.rm = TRUE), 2)) %>%
mutate(hoodname = case_when(
neighbourhood == 1 ~ "Neighbourhood A",
neighbourhood == 2 ~ "Neighbourhood B",
neighbourhood == 3 ~ "Neighbourhood C",
TRUE ~ NA_character_,
schooldist = ifelse(is.nan(schooldist), NA_real_, schooldist)
) %>%
select(-school.dist) %>%
right_join(Glasgow, by = "student")
rm(angle.1, angle.2, angle.3, clustering, dist.school, dist1, distance.1, distance.2, distance.3, Glasgow, neighbourhood)
# Add nominated friends (student codes).
# Load and count friendship nominations.
# Stack waves
friendships <- rbind(friendship.1, friendship.2, friendship.3) %>%
#change into a data frame
as_tibble(rownames = "student") %>%
#add wave indicator
mutate(wave = c( rep("t1", 160), rep("t2", 160), rep("t3", 160))) %>%
# stack
pivot_longer(s001:s160, names_to = "nominated", values_to = "nomination" ) %>%
# drop nomination score 0 and 10
filter(nomination %in% c(1, 2))
# add friendship nominations
GlasgowFriends <- friendships %>%
# add rank number per student
group_by(wave, student) %>%
# best friends (coded 1) as first nomination(s)
arrange(nomination, .by_group = TRUE) %>%
mutate( constant = 1, nominnr = cumsum(constant)) %>%
# delete superfluous variables
select(-nomination, -constant) %>%
# spread
pivot_wider( names_from = nominnr, names_prefix = "friend_", values_from = nominated) %>%
# add to base file
full_join(GlasgowFriends, by = c("student", "wave")) %>%
# reorder variables (and drop friendships)
select(student, neighbourhood:romantic, wave:friend_6) %>%
# add best friend (if any) with first and last wave
GlasgowFriends <- friendships %>%
#select best friends
filter(nomination == 1) %>%
#get first and last wave of best friendship
group_by(student, nominated) %>%
summarise(from = min(wave), to = max(wave)) %>%
pivot_longer(from:to, names_to = "bfperiod", values_to = "bfwave" ) %>%
# add to base file
full_join(GlasgowFriends, by = "student") %>%
# reorder and rename
select(student, neighbourhood:friend_6, bestfriend = nominated, bfperiod, bfwave) %>%
# remove all grouping info
rm(friendship.1, friendship.2, friendship.3, friendships)
# save as .RData for inclusion in the package
save(GlasgowFriends, file = "GlasgowFriends.RData")
